# -------------------------------------------------------------------
# Purpose: Creates Figure B14
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate both panels of the figure
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


## Load data
load(file.path(pdataconfanalysis, "surnameCountyLevel19001940.RData"))

# Left panel: Patents per 1,000 people
temp <- copy(surnameCountyLevel19001940) %>% drop_na(sum_patents_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws)
temp[, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws := resid(feols(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + statefip^year, data = temp, weights = temp$wgt_name))]
temp[, sum_patents_pc_1900_f_w := resid(feols(sum_patents_pc_1900_f_w ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + statefip^year, data = temp, weights = temp$wgt_name))]
est1 <- binsreg(sum_patents_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, data = temp, weights = temp$wgt_name)
result <- est1$data.plot$`Group Full Sample`
p <- ggplot() +
    labs(
        x = "Predicted surname diversity",
        y = NULL,
        subtitle = "Patents per 1,000 people",
    ) +
    geom_point(data = result$data.dots, aes(x = x, y = fit), size = 2, alpha = 1) +
    theme_minimal()
plotname <- file.path(poutputappendix, "figureB14left.pdf")
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure saved to:", plotname, "\n")


# Right panel: Breakthrough patents per 1,000 people
temp <- copy(surnameCountyLevel19001940) %>% drop_na(sum_break_p80_rrfsim05_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws)
temp[, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws := resid(feols(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + statefip^year, data = temp, weights = temp$wgt_name))]
temp[, sum_break_p80_rrfsim05_pc_1900_f_w := resid(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + statefip^year, data = temp, weights = temp$wgt_name))]
est1 <- binsreg(sum_break_p80_rrfsim05_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, data = temp, weights = temp$wgt_name)
result <- est1$data.plot$`Group Full Sample`
p <- ggplot() +
    labs(
        x = "Predicted surname diversity",
        y = NULL,
        subtitle = "Breakthrough patents per 1,000 people",
    ) +
    geom_point(data = result$data.dots, aes(x = x, y = fit), size = 2, alpha = 1) +
    theme_minimal()
plotname <- file.path(poutputappendix, "figureB14right.pdf")
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure saved to:", plotname, "\n")
